#nbi:hide_in
from IPython.display import display, Markdown, clear_output
from ipywidgets import interact, ButtonStyle
import ipywidgets as widgets
import os
import pandas as pd
import numpy as np
#nbi:hide_in
gmbr_mudah = widgets.HTML(
value='<img src="mudah.png">',
placeholder='',
description='',
)
#gmbr_mudah
#widgets.HTML(value='<img src="path_to_image.png">')
#nbi:hide_in
gmbr_scam = widgets.HTML(
value='<img src="Suspicous Ads.png">',
placeholder='',
description='',
)
#gmbr_scam
#nbi:hide_in
helpbtn = widgets.Button(
style=ButtonStyle(button_color='lightblue'),
description='Help',
disabled=False,
button_style='', # 'success', 'info', 'warning', 'danger' or ''
)
#helpbtn
#nbi:hide_in
aboutbtn = widgets.Button(
style=ButtonStyle(button_color='lightblue'),
description='About Us',
disabled=False,
button_style='', # 'success', 'info', 'warning', 'danger' or ''
)
#aboutbtn
#nbi:hide_in
str_header1 = widgets.HTML(
value="Beware of <b>SCAMMER</b>, buy from the <b>TRUSTED<b> one",
placeholder='',
description='',
)
#str_header1
#nbi:hide_in
str_scammer = widgets.HTML(
value="<b>Scammer Detector<b>",
placeholder='',
description='',
)
#nbi:hide_in
#header = widgets.VBox([widgets.HBox([logo_mudah, str_scammer, helpbtn, aboutbtn]), str_header1])
header = widgets.HBox([gmbr_mudah, gmbr_scam, widgets.VBox([helpbtn, aboutbtn])])
#header = widgets.HBox([gmbr_mudah, gmbr_scam, helpbtn, aboutbtn])
#header
#nbi:hide_in
meurl=widgets.Text(value= '', placeholder='Paste your mudah.my link here', description='Link:', disabled=False)
#meurl
#nbi:hide_in
type_car = ['Perodua Kancil', 'Perodua Myvi', 'Honda Accord', 'Honda Civic', 'Toyota Vios' ]
car_dropdown = widgets.Dropdown(
options= type_car,
value= type_car[0],
description='Type of Car:',
disabled=False,
)
#car_dropdown
#nbi:hide_in
list_location = ['Selangor', 'Kuala Lumpur']
location_dropdown = widgets.Dropdown(
options= list_location,
value= list_location[0],
description='Location:',
disabled=False,
)
#location_dropdown
#nbi:hide_in
pageslist = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
pages_dropdown = widgets.Dropdown(
options = pageslist,
value = pageslist[0],
description='Num Pages:',
disabled=False,
)
#pages_dropdown
#nbi:hide_in
scrap_param = widgets.HBox([car_dropdown, location_dropdown, pages_dropdown])
#scrap_param
#nbi:hide_in
fpath = widgets.Text(
value='',
placeholder='Save Scraped Dataset',
description='Folder Path:',
disabled=False
)
#fpath
#nbi:hide_in
fname = widgets.Text(
value='',
placeholder='Name your scrap csv file',
description='CSV file:',
disabled=False
)
#fnamea
save_csvname = fname.value + '.csv'
save_folderpath = os.path.join(fpath.value, csvname)
save_folderpath
#nbi:hide_in
lpath = widgets.Text(
value='',
placeholder='Upload cleaned dataset',
description='Folder Path:',
disabled=False
)
#lpath
#nbi:hide_in
lname = widgets.Text(
value='',
placeholder='Name of cleaned csv file',
description='CSV file:',
disabled=False
)
#lname
clean_csvname = fname.value + '.csv'
clean_folderpath = os.path.join(fpath.value, csvname)
clean_folderpath
#nbi:hide_in
str_link= widgets.HTML(
value="<i><b>https://www.mudah.my/malaysia/cars-for-sale?o=1&q=&th=1</b><i>",
placeholder='',
description='Sample Link:',
)
#str_link
#nbi:hide_in
str_folder= widgets.HTML(
value="<i><b>C:\\Users\\LENOVO\\Documents</b><i>",
placeholder='',
description='Sample Path:',
)
#str_folder
#nbi:hide_in
str_csv= widgets.HTML(
value="<i><b>kereta</b><i>",
placeholder='',
description='Sample CSV:',
)
#str_csv
#nbi:hide_in
str_view= widgets.HTML(
value="<b>Choose how to view your data</b>",
placeholder='',
description='',
)
#str_view
#nbi:hide_in
viewlist_ = ['5', '6', '7', '8', '9', '10']
view_dropdown = widgets.Dropdown(
options = viewlist_,
value = viewlist_[0],
description='Num Rows:',
disabled=False,
)
#view_dropdown
#nbi:hide_in
view_by = ['Head', 'Random', 'Tail']
viewby_dropdown = widgets.Dropdown(
options = view_by,
value = view_by[0],
description='By:',
disabled=False,
)
#viewby_dropdown
#nbi:hide_in
viewby_btn = widgets.Button(
#style=ButtonStyle(button_color='blue'),
description='View',
disabled=False,
button_style='info', # 'success', 'info', 'warning', 'danger' or ''
tooltip='Click Me',
icon=''
)
#viewby_btn
#nbi:hide_in
#import cleaned datafile
# C:\Users\LENOVO\Documents\fyp fathiah\Program\Test Case\dataset
os.chdir(r"C:\Users\LENOVO\Documents\fyp fathiah\Program\Test Case\dataset")
missing_values = ["n/a", "na", "-", "0"]
#filepath_or_buffer = "ToyotaVios KL & SELANGORNUMCOL.csv"
filepath_or_buffer = "HondaAccord KL & SELANGOR.csv"
df = pd.read_csv(filepath_or_buffer, na_values = missing_values)
#nbi:hide_in
def view(obj, n):
if (obj == view_by[0]):
print(obj)
display(df.head(n))
elif (obj == view_by[1]):
print(obj)
display(df.sample(n))
elif (obj == view_by[2]):
print(obj)
display(df.tail(n))
#nbi:hide_in
#bscrap = widgets.Button(style=ButtonStyle(button_color='orange'), description='Srap & Save')
view_output = widgets.Output()
def on_button_clicked(_):
# "linking function with output"
with view_output:
# what happens when we press the button
clear_output()
view(viewby_dropdown.value, int(view_dropdown.value))
#print('Me Scrapped!')
# linking button and function together using a button's method
viewby_btn.on_click(on_button_clicked)
#nbi:hide_in
view_by_header = widgets.VBox([widgets.HBox([viewby_dropdown, view_dropdown]), widgets.VBox([viewby_btn, view_output])])
#view_by_header
view(viewby_dropdown.value, int(view_dropdown.value))
df.sample(5)
#nbi:hide_in
str_pca= widgets.HTML(
value="Apply <b>Data Mining<b> Method!!",
placeholder='',
description='',
)
#str_pca
#nbi:hide_in
str_box= widgets.HTML(
value="<b>Choose to view and compare by attributes<b>",
placeholder='',
description='',
)
#str_box
#nbi:hide_in
str_result= widgets.HTML(
value="<b>Click to view Potential Scammers!!<b>",
placeholder='',
description='',
)
#str_result
#nbi:hide_in
pcabtn = widgets.Button(
style=ButtonStyle(button_color='orange'),
description='PCA!',
disabled=False,
button_style='', # 'success', 'info', 'warning', 'danger' or ''
tooltip='Click me',
icon='check'
)
#pcabtn
#bscrap = widgets.Button(description='Scrap Me')
def pca_plot(FinalData):
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode, download_plotlyjs, plot
from plotly.widgets import GraphWidget
init_notebook_mode()
trace = go.Scatter(x=FinalData[:,0], y=FinalData[:,1], mode="markers", marker = dict(size=10, line = dict(width=1) ,color="pink"), text= mytexts)
mydata = [trace]
mylayout = go.Layout(title='2 Component PCA: ', xaxis=dict(title='component 1'), yaxis=dict(title='component 2'), hovermode = 'closest', autosize=False, width=1000, height=1000)
fig_comp = go.Figure(data=mydata, layout=mylayout)
iplot(fig_comp)
#plot(fig_comp)
pca_plot(FinalData)
#nbi:hide_in
#bscrap = widgets.Button(style=ButtonStyle(button_color='orange'), description='Srap & Save')
pca_output = widgets.Output()
def on_button_clicked(_):
# "linking function with output"
with pca_output:
# what happens when we press the button
clear_output()
pca_plot(FinalData)
#print('Me Scrapped!')
# linking button and function together using a button's method
pcabtn.on_click(on_button_clicked)
strcol = ['Name', 'Mileage', 'CC', 'Condition', 'Link']
datacol = ['Price', 'Manufactured Year', 'NewMil']
x = df[datacol]
y = df[strcol]
#r = len(df2) #all rows in d2(cleaned dataset)
#c = len(x.columns) #numcols only
#1.FORMING MATRIX D(RowDataAdjust)
def step1 (r, c, x):
avgs = []
for i in range(len(x.columns)):
avg = x.iloc[:, i].mean()
avgs.append(avg)
MeanAdjustedData = np.zeros((r,c))
minus = 0.0
for j in range(c): #column
for i in range(r): #row
minus = x.iloc[i, j] - avgs[j]
MeanAdjustedData[i][j] = minus
#print(j, i, df1.iloc[i, j], avgs[j], minus, MeanAdjustedData[i][j])
minus = 0.0
return MeanAdjustedData
#2. FORMING COVARIANCE MATRIX
def step2(r, matrix):
mymat = np.dot((1/(r-1)), matrix.T)
mycov = np.dot(mymat, MeanAdjustedData)
return mycov
#3. CALCULATE EIGENVALUES, EIGENVECTORS
def step3_4(r, matrix):
import scipy.linalg as la
A = np.array(matrix)
results = la.eig(A)
# First column is the first eigenvector
eigvals, eigvecs = la.eig(A)
#SORTING EIGENVALS WITH CORRESPOND EIGENVECS
idx = eigvals.argsort()[::-1]
eigvals = eigvals[idx]
eigvecs = eigvecs[:,idx]
#2 COMPONENTS
feature_vec = np.zeros((r,2))
#copy r rows and c columns from eigvecs
for j in range(len(feature_vec[0])): #column
for i in range(len(feature_vec)): #row
feature_vec[i][j]=eigvecs[i][j].copy()
return feature_vec
def step5(matrix1, matrix2):
RowFeatureVecs_ = matrix1.T
RowDataAdjust = matrix2.T
FinalData = np.dot(RowFeatureVecs_, RowDataAdjust)
FinalDataT = FinalData.T
return FinalDataT
MeanAdjustedData = step1(len(df), len(x.columns), x)
mycov = step2(len(df), MeanAdjustedData)
feature_vec = step3_4(len(x.columns), mycov)
FinalData = step5(feature_vec, MeanAdjustedData)
NewFinal = pd.DataFrame(FinalData, columns=['PC1', 'PC2'])
NewFinal.head()
NewFinal.to_csv('FinalData_HondaAccord.csv', index=False, header=True)
def data_label(df):
prices = []
inds =[]
years = []
mils = []
for i in range(len(df)):
price = "Price: " + df['Price'].values[i].astype(str)
prices.append(price)
ind = "ID: " + df.index.values[i].astype(str)
inds.append(ind)
year = "Year: " + df['Manufactured Year'].values[i].astype(str) #Private
years.append(year)
mil = "Mileage: " + df['NewMil'].values[i].astype(str)
mils.append(mil)
mytexts = []
for i in range(len(df)):
mytext = inds[i] + ", " + prices[i] + ", " + years[i] + ", " + mils[i]
mytexts.append(mytext)
return mytexts
mytexts = data_label(df)
mytexts[0]
#6 PLOTTING PCA SCATTER PLOT
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode, download_plotlyjs, plot
from plotly.widgets import GraphWidget
init_notebook_mode()
trace = go.Scatter(x=FinalData[:,0], y=FinalData[:,1], mode="markers", marker = dict(size=10, line = dict(width=1) ,color="pink"), text= mytexts)
mydata = [trace]
mylayout = go.Layout(title='2 Component PCA: ', xaxis=dict(title='component 1'), yaxis=dict(title='component 2'), hovermode = 'closest', autosize=False, width=1000, height=1000)
fig_comp = go.Figure(data=mydata, layout=mylayout)
#iplot(fig_comp)
plot(fig_comp)
#nbi:hide_in
boxplotbtn = widgets.Button(
style=ButtonStyle(button_color='orange'),
description='Boxplot',
disabled=False,
button_style='', # 'success', 'info', 'warning', 'danger' or ''
tooltip='Click me',
icon='check'
)
#boxplotbtn
#nbi:hide_in
boxplot_filter = widgets.SelectMultiple(
options=['Price', 'Year', 'Mileage'],
value=['Price'],
rows=3,
description='Filter by',
disabled=False
)
#nbi:hide_in
scambtn = widgets.Button(
style=ButtonStyle(button_color='red'),
description='Click Me',
disabled=False,
button_style='', # 'success', 'info', 'warning', 'danger' or ''
tooltip='Click me',
icon='check'
)
#scambtn
#nbi:hide_in
#filepath=fpath.value + "\\" + fname.value + ".csv"
#filepath
#nbi:hide_in
#accordion.children[0].value
#nbi:hide_in
#import dataframe
import pandas as pd
import requests as req
#calling package URL lib
from urllib.request import urlopen as uReq
#pase HTML text
from bs4 import BeautifulSoup as soup
#filename = "df_mulpages.csv"
#filename = filepath
def scrapme(meurl, filepath):
filename = filepath
def subs (Mileage):
if Mileage == "0 - 4 999":
return Mileage.replace("0 - 4 999", "2500", 1)
elif Mileage == "5 000 - 9 999":
return Mileage.replace("5 000 - 9 999", "52500", 1)
elif Mileage == "10 000 - 14 999":
return Mileage.replace("10 000 - 14 999", "12500", 1)
elif Mileage == "15 000 - 19 999":
return Mileage.replace("15 000 - 19 999", "17500", 1)
elif Mileage == "20 000 - 24 999":
return Mileage.replace("20 000 - 24 999", "22500", 1)
elif Mileage == "25 000 - 29 999":
return Mileage.replace("25 000 - 29 999", "27500", 1)
elif Mileage == "30 000 - 34 999":
return Mileage.replace("30 000 - 34 999", "32500", 1)
elif Mileage == "35 000 - 39 999":
return Mileage.replace("35 000 - 39 999", "37500", 1)
elif Mileage == "40 000 - 44 999":
return Mileage.replace("40 000 - 44 999", "42500", 1)
elif Mileage == "45 000 - 49 999":
return Mileage.replace("45 000 - 49 999", "47500", 1)
elif Mileage == "50 000 - 54 999":
return Mileage.replace("50 000 - 54 999", "52500", 1)
elif Mileage == "55 000 - 59 999":
return Mileage.replace("55 000 - 59 999", "57500", 1)
elif Mileage == "60 000 - 64 999":
return Mileage.replace("60 000 - 64 999", "62500", 1)
elif Mileage == "65 000 - 69 999":
return Mileage.replace("65 000 - 69 999", "67500", 1)
elif Mileage == "70 000 - 74 999":
return Mileage.replace("70 000 - 74 999", "72500", 1)
elif Mileage == "75 000 - 79 999":
return Mileage.replace("75 000 - 79 999", "77500", 1)
elif Mileage == "80 000 - 84 999":
return Mileage.replace("80 000 - 84 999", "82500", 1)
elif Mileage == "85 000 - 89 999":
return Mileage.replace("85 000 - 89 999", "87500", 1)
elif Mileage == "90 000 - 94 999":
return Mileage.replace("90 000 - 94 999", "92500", 1)
elif Mileage == "95 000 - 99 999":
return Mileage.replace("95 000 - 99 999", "97500", 1)
elif Mileage == "100 000 - 109 999":
return Mileage.replace("100 000 - 109 999", "105000", 1)
elif Mileage == "110 000 - 119 999":
return Mileage.replace("110 000 - 119 999", "115000", 1)
elif Mileage == "120 000 - 129 999":
return Mileage.replace("120 000 - 129 999", "125000", 1)
elif Mileage == "130 000 - 139 999":
return Mileage.replace("130 000 - 139 999", "135000", 1)
elif Mileage == "140 000 - 149 999":
return Mileage.replace("140 000 - 149 999", "145000", 1)
elif Mileage == "150 000 - 159 999":
return Mileage.replace("150 000 - 159 999", "155000", 1)
elif Mileage == "160 000 - 169 999":
return Mileage.replace("160 000 - 169 999", "165000", 1)
elif Mileage == "170 000 - 179 999":
return Mileage.replace("170 000 - 179 999", "175000", 1)
elif Mileage == "180 000 - 189 999":
return Mileage.replace("180 000 - 189 999", "185000", 1)
elif Mileage == "190 000 - 199 999":
return Mileage.replace("190 000 - 199 999", "195000", 1)
elif Mileage == "200 000 - 249 999":
return Mileage.replace("200 000 - 249 999", "225000", 1)
elif Mileage == "250 000 - 299 999":
return Mileage.replace("250 000 - 299 999", "275000", 1)
elif Mileage == "300 000 - 349 999":
return Mileage.replace("300 000 - 349 999", "325000", 1)
elif Mileage == "350 000 - 399 999":
return Mileage.replace("350 000 - 399 999", "375000", 1)
elif Mileage == "400 000 - 449 999":
return Mileage.replace("400 000 - 449 999", "425000", 1)
elif Mileage == "450 000 - 499 999":
return Mileage.replace("450 000 - 499 999", "475000", 1)
else:
return Mileage
def dprice (a):
if (' ' in a) == True:
return ''.join(a.split())
else:
return a
container = []
pages = []
for i in range(1,2):
my_url = 'https://www.mudah.my/kuala-lumpur/cars-for-sale/toyota/vios?o=' + str(i) + '&q=&so=1&f=p&th=1'
pages.append(my_url)
for item in pages:
page = req.get(item)
page_soup = soup(page.text, "html.parser")
containers = page_soup.findAll("div", {"class":"listing_params_container"})
for fathiah in containers:
clink = fathiah.div.div.a["href"]
name = fathiah.div.div.a["title"].strip()
price = fathiah.findAll("div", {"class":"ads_price"})
Price = price[0].text.strip()
year = fathiah.findAll("font", {"class":"icon_label"})
Year = year[1].text.strip()
Mileage = year[2].text.strip()
CC = year[3].text.strip()
Condition = year[0].text.strip()
newMil = subs(Mileage).strip()
Price1 = Price.replace("RM", "", 1)
nPrice = dprice(Price1)
container.append((name, nPrice, Year, Mileage, newMil, CC, Condition, clink))
df = pd.DataFrame(container, columns = ['Name', 'Price', 'Manufactured Year', 'Mileage', 'NewMil', 'CC', 'Condition', 'Link'])
df.to_csv(filename, index=False, encoding='utf-8')
#print (df)
print("Done scrap " + filename)
#nbi:hide_in
def scrap(mycar, mylocation, mypage, filepath):
filename = filepath
def subs (Mileage):
if Mileage == "0 - 4 999":
return Mileage.replace("0 - 4 999", "2500", 1)
elif Mileage == "5 000 - 9 999":
return Mileage.replace("5 000 - 9 999", "52500", 1)
elif Mileage == "10 000 - 14 999":
return Mileage.replace("10 000 - 14 999", "12500", 1)
elif Mileage == "15 000 - 19 999":
return Mileage.replace("15 000 - 19 999", "17500", 1)
elif Mileage == "20 000 - 24 999":
return Mileage.replace("20 000 - 24 999", "22500", 1)
elif Mileage == "25 000 - 29 999":
return Mileage.replace("25 000 - 29 999", "27500", 1)
elif Mileage == "30 000 - 34 999":
return Mileage.replace("30 000 - 34 999", "32500", 1)
elif Mileage == "35 000 - 39 999":
return Mileage.replace("35 000 - 39 999", "37500", 1)
elif Mileage == "40 000 - 44 999":
return Mileage.replace("40 000 - 44 999", "42500", 1)
elif Mileage == "45 000 - 49 999":
return Mileage.replace("45 000 - 49 999", "47500", 1)
elif Mileage == "50 000 - 54 999":
return Mileage.replace("50 000 - 54 999", "52500", 1)
elif Mileage == "55 000 - 59 999":
return Mileage.replace("55 000 - 59 999", "57500", 1)
elif Mileage == "60 000 - 64 999":
return Mileage.replace("60 000 - 64 999", "62500", 1)
elif Mileage == "65 000 - 69 999":
return Mileage.replace("65 000 - 69 999", "67500", 1)
elif Mileage == "70 000 - 74 999":
return Mileage.replace("70 000 - 74 999", "72500", 1)
elif Mileage == "75 000 - 79 999":
return Mileage.replace("75 000 - 79 999", "77500", 1)
elif Mileage == "80 000 - 84 999":
return Mileage.replace("80 000 - 84 999", "82500", 1)
elif Mileage == "85 000 - 89 999":
return Mileage.replace("85 000 - 89 999", "87500", 1)
elif Mileage == "90 000 - 94 999":
return Mileage.replace("90 000 - 94 999", "92500", 1)
elif Mileage == "95 000 - 99 999":
return Mileage.replace("95 000 - 99 999", "97500", 1)
elif Mileage == "100 000 - 109 999":
return Mileage.replace("100 000 - 109 999", "105000", 1)
elif Mileage == "110 000 - 119 999":
return Mileage.replace("110 000 - 119 999", "115000", 1)
elif Mileage == "120 000 - 129 999":
return Mileage.replace("120 000 - 129 999", "125000", 1)
elif Mileage == "130 000 - 139 999":
return Mileage.replace("130 000 - 139 999", "135000", 1)
elif Mileage == "140 000 - 149 999":
return Mileage.replace("140 000 - 149 999", "145000", 1)
elif Mileage == "150 000 - 159 999":
return Mileage.replace("150 000 - 159 999", "155000", 1)
elif Mileage == "160 000 - 169 999":
return Mileage.replace("160 000 - 169 999", "165000", 1)
elif Mileage == "170 000 - 179 999":
return Mileage.replace("170 000 - 179 999", "175000", 1)
elif Mileage == "180 000 - 189 999":
return Mileage.replace("180 000 - 189 999", "185000", 1)
elif Mileage == "190 000 - 199 999":
return Mileage.replace("190 000 - 199 999", "195000", 1)
elif Mileage == "200 000 - 249 999":
return Mileage.replace("200 000 - 249 999", "225000", 1)
elif Mileage == "250 000 - 299 999":
return Mileage.replace("250 000 - 299 999", "275000", 1)
elif Mileage == "300 000 - 349 999":
return Mileage.replace("300 000 - 349 999", "325000", 1)
elif Mileage == "350 000 - 399 999":
return Mileage.replace("350 000 - 399 999", "375000", 1)
elif Mileage == "400 000 - 449 999":
return Mileage.replace("400 000 - 449 999", "425000", 1)
elif Mileage == "450 000 - 499 999":
return Mileage.replace("450 000 - 499 999", "475000", 1)
else:
return Mileage
def dprice (a):
if (' ' in a) == True:
return ''.join(a.split())
else:
return a
container = []
pages = []
#Parameter1: LOCATION
param1 = location(mylocation)
car_sale = 'cars-for-sale' + '/'
#Parameter2: TYPE OF CAR
param2 = car(mycar)
#Parameter3: pages
n = int(mypage)
user_seller = '&f=p'
front_link = 'https://www.mudah.my/' + param1 + car_sale + param2 + 'o='
end_link = '&q' + '&so=1' + user_seller + '&th=1'
test_link = front_link + '1' + end_link
uClient = uReq(test_link)
page_html = uClient.read()
uClient.close()
num_page_soup = soup(page_html, "html.parser")
num_containers = num_page_soup.findAll("div", {"class":"listing_title"})
str_pg = num_containers[0].h1.text
split_str_pg = str_pg.split(' ')
max_pg = int(split_str_pg[-1])
print('Max pg:' , max_pg, 'Chosen pages:', n)
if (n <= max_pg):
print('if', n, max_pg)
for i in range(1, n+1 ,1):
#my_url = 'https://www.mudah.my/kuala-lumpur/cars-for-sale/toyota/vios?o=' + str(i) + '&q=&so=1&f=p&th=1'
print('first for loop')
my_url = front_link + str(i) + end_link
print('My_url:' + my_url)
pages.append(my_url)
for item in pages:
page = req.get(item)
page_soup = soup(page.text, "html.parser")
containers = page_soup.findAll("div", {"class":"listing_params_container"})
for fathiah in containers:
clink = fathiah.div.div.a["href"]
name = fathiah.div.div.a["title"].strip()
price = fathiah.findAll("div", {"class":"ads_price"})
Price = price[0].text.strip()
year = fathiah.findAll("font", {"class":"icon_label"})
Year = year[1].text.strip()
Mileage = year[2].text.strip()
CC = year[3].text.strip()
Condition = year[0].text.strip()
newMil = subs(Mileage).strip()
Price1 = Price.replace("RM", "", 1)
nPrice = dprice(Price1)
container.append((name, nPrice, Year, Mileage, newMil, CC, Condition, clink))
df = pd.DataFrame(container, columns = ['Name', 'Price', 'Manufactured Year', 'Mileage', 'NewMil', 'CC', 'Condition', 'Link'])
df.to_csv(filename, index=False, encoding='utf-8')
#print (df)
print("Done scrap " + filename)
else:
print('Unable to scrap, the chosen number of pages (', n, ') exceeded the maximum page(s) available which is' , max_pg)
pass
#def scrap(car, location, page, filepath)
scrap(car_dropdown.value, location_dropdown.value, pages_dropdown.value, 'C:\\Users\\LENOVO\\Documents\\fyp fathiah\\Program\\Test1.csv')
car(car_dropdown.value)
location(location_dropdown.value)
#nbi:hide_in
def car(car):
car_param = car
car_param = car_param.replace(" ", "/") + "?"
return (car_param.lower())
#nbi:hide_in
def location(location):
location_param = location
if ' ' in location_param:
location_param = location_param.replace(" ", "-") + "/"
print('y')
else:
location_param = location_param + "/"
print('n')
return(location_param.lower())
#nbi:hide_in
bscrap = widgets.Button(style=ButtonStyle(button_color='orange'), description='Srap & Save')
oscrap = widgets.Output()
def on_button_clicked(_):
# "linking function with output"
with oscrap:
# what happens when we press the button
clear_output()
#scrapme(meurl.value, filepath)
#plt.plot(X, y)
print('Me Scrapped!')
# linking button and function together using a button's method
bscrap.on_click(on_button_clicked)
# displaying button and its output together
#widgets.VBox([bscrap,oscrap])
#nbi:hide_in
#print("This is header")
header
#nbi:hide_in
gmbr_kereta = widgets.HTML(
value='<img src="highway car1_trademark_scamtrust.png">',
placeholder='',
description='',
)
gmbr_kereta
#nbi:hide_in
#accordion1 = widgets.Accordion(children=[widgets.VBox([str_link, meurl]), widgets.VBox([str_folder, fpath, str_csv, fname]), widgets.HBox([scrap_dropdown, widgets.VBox([bscrap,oscrap])])])
accordion1 = widgets.Accordion(children=[scrap_param, widgets.VBox([str_folder, fpath, str_csv, fname, widgets.VBox([bscrap,oscrap])])])
accordion1.set_title(0, 'Extract')
accordion1.set_title(1, 'Save')
#accordion1.set_title(0, 'Source')
#accordion1.set_title(1, 'Save')
#accordion1.set_title(2, 'Scrap')
accordion1
#nbi:hide_in
analysisbtn = widgets.HBox([boxplotbtn, boxplot_filter])
#analysisbtn
#nbi:hide_in
accordion2 = widgets.Accordion(children=[widgets.VBox([str_folder,lpath, str_csv, lname]), view_by_header])
accordion2.set_title(0, 'Upload')
accordion2.set_title(1, 'View')
accordion2
#nbi:hide_in
accordion3 = widgets.Accordion(children=[widgets.VBox([str_pca,widgets.VBox([pca_output, pcabtn])]), widgets.VBox([str_box, analysisbtn]), widgets.VBox([str_result, scambtn])])
accordion3.set_title(0, 'Method')
accordion3.set_title(1, 'Analysis')
accordion3.set_title(2, 'Result')
accordion3
#nbi:hide_in
tab_nest = widgets.Tab()
tab_nest.children = [accordion1, accordion2, accordion3]
tab_nest.set_title(0, '1. Extaction')
tab_nest.set_title(1, '2. Data')
tab_nest.set_title(2, '3. Evaluation')
print('Tab Nest')
display(tab_nest)